Loading one-to-one orthologs between P. barbatus and C. floridanus
path_to_pogodata <- paste0(path_to_repo,"/data/gordon_data/")
pogo.cflo <-
read.csv(paste0(path_to_pogodata, "pogo_cflo_orthologs.csv"),
header = T, stringsAsFactors = F, na.strings = c(""," ",".", "NA")) %>%
as_tibble() %>%
select(cflo_gene,pogo_gene) %>%
distinct()
# make a function that takes pogo names and returns cflo names
pogo_to_cflo <- function(geneset) {
cflo_genes <-
pogo.cflo %>%
filter(pogo_gene %in% geneset) %>%
pull(cflo_gene) %>%
unique() %>%
as.character()
return(cflo_genes)
}
# make a function that takes pogo names and returns cflo names
cflo_to_pogo <- function(geneset) {
pogo_genes <-
pogo.cflo %>%
filter(cflo_gene %in% geneset) %>%
pull(pogo_gene) %>%
unique() %>%
as.character()
return(pogo_genes)
}
# SAMPLE NAME
## specify sample name
sample.name <- c("pbar_ld","pbar_dd")
# eJTK OUTPUT
## Set GammaP threshold below which genes are classified as rhythmic
gamma.pval = 0.05
## Number of genes in the Pbar genome
nGenes = 12557
## Initialize list to save genes of interest in
goi.list <- list()
# LOAD DATABASES (TC9)
## 1. TC9_ejtk.db
# writeLines("Loading 'TC9_ejtk' database that contains all ejtk-output for TC9")
ejtk.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo, "/data/databases/TC9_ejtk.db"))
# which tables are in the database
src_dbi(ejtk.db)
## src: sqlite 3.41.2 [/Users/bidas/Documents/05_postdoc/Deborah_Gordon/04_research/2022/04_timecourse_RNASeq/data/databases/TC9_ejtk.db]
## tbls: pbar_dd_zscores_08h, pbar_dd_zscores_12h, pbar_dd_zscores_24h,
## pbar_ld_zscores_08h, pbar_ld_zscores_12h, pbar_ld_zscores_24h
## 2. TC9_data.db
# writeLines("Loading 'TC9_data' database that contains all expression data for TC9")
data.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo, "/data/databases/TC9_data.db"))
src_dbi(data.db)
## src: sqlite 3.41.2 [/Users/bidas/Documents/05_postdoc/Deborah_Gordon/04_research/2022/04_timecourse_RNASeq/data/databases/TC9_data.db]
## tbls: pbar_dd_expressed_genes, pbar_dd_fpkm, pbar_dd_log2fpkm, pbar_dd_zscores,
## pbar_ld_expressed_genes, pbar_ld_fpkm, pbar_ld_log2fpkm, pbar_ld_zscores
dat.fpkm <- list(
ld = tbl(data.db, paste0(sample.name[1],"_fpkm")) %>% collect(),
dd = tbl(data.db, paste0(sample.name[2],"_fpkm")) %>% collect()
)
Using time-course RNASeq data from LD forager heads:
not.expressed <- list()
for (i in 1:length(sample.name)) {
# which genes have zero expression at all time points
not.expressed[[i]] <-
tbl(data.db, paste0(sample.name[i],"_fpkm")) %>%
collect() %>%
filter_at(vars(starts_with("Z")), all_vars(. == 0)) %>%
pull(gene_name)
writeLines(paste0("How many genes have zero expression in ", sample.name[i]," at all time points?"))
length(not.expressed[[i]]) %>% print()
}
## How many genes have zero expression in pbar_ld at all time points?
## [1] 233
## How many genes have zero expression in pbar_dd at all time points?
## [1] 321
### Check to see if the analyses worked. All values should be zero
for (i in 1:length(sample.name)) {
writeLines(paste0("Not-expressed genes in ", sample.name[i], "; first two rows"))
dat.fpkm[[i]] %>%
filter(gene_name %in% not.expressed[[i]]) %>%
head(2) %>% print()
writeLines(paste0("Not-expressed genes in ", sample.name[i], "; last two rows\n"))
dat.fpkm[[i]] %>%
filter(gene_name %in% not.expressed[[i]]) %>%
tail(2) %>% print()
}
## Not-expressed genes in pbar_ld; first two rows
## # A tibble: 2 × 13
## gene_name ZTLD01 ZTLD03 ZTLD05 ZTLD07 ZTLD09 ZTLD11 ZTLD13 ZTLD15 ZTLD17
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOC105421983 0 0 0 0 0 0 0 0 0
## 2 LOC105421988 0 0 0 0 0 0 0 0 0
## # ℹ 3 more variables: ZTLD19 <dbl>, ZTLD21 <dbl>, ZTLD23 <dbl>
## Not-expressed genes in pbar_ld; last two rows
##
## # A tibble: 2 × 13
## gene_name ZTLD01 ZTLD03 ZTLD05 ZTLD07 ZTLD09 ZTLD11 ZTLD13 ZTLD15 ZTLD17
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOC112553082 0 0 0 0 0 0 0 0 0
## 2 LOC112553095 0 0 0 0 0 0 0 0 0
## # ℹ 3 more variables: ZTLD19 <dbl>, ZTLD21 <dbl>, ZTLD23 <dbl>
## Not-expressed genes in pbar_dd; first two rows
## # A tibble: 2 × 13
## gene_name ZTDD01 ZTDD03 ZTDD05 ZTDD07 ZTDD09 ZTDD11 ZTDD13 ZTDD15 ZTDD17
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOC105421983 0 0 0 0 0 0 0 0 0
## 2 LOC105421994 0 0 0 0 0 0 0 0 0
## # ℹ 3 more variables: ZTDD19 <dbl>, ZTDD21 <dbl>, ZTDD23 <dbl>
## Not-expressed genes in pbar_dd; last two rows
##
## # A tibble: 2 × 13
## gene_name ZTDD01 ZTDD03 ZTDD05 ZTDD07 ZTDD09 ZTDD11 ZTDD13 ZTDD15 ZTDD17
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOC112553069 0 0 0 0 0 0 0 0 0
## 2 LOC112553095 0 0 0 0 0 0 0 0 0
## # ℹ 3 more variables: ZTDD19 <dbl>, ZTDD21 <dbl>, ZTDD23 <dbl>
### Gene set of interest 1 ###
gsoi.1 <- list()
# names(gsoi.1) <- "not_expressed_genes"
## loop STARTS
for (i in 1:length(sample.name)) {
writeLines(paste("running functional enrichment analyses for NOT-EXPRESSED genes in", sample.name[[i]]))
genes <- not.expressed[[i]]
# run enrichment
genes %>%
check_enrichment2(.,
org = "pbar",
what = "pfams",
bg = 'all',
expand = T) %>%
# save the results to a file
arrange(annot_desc) -> gsoi.1[[i]]
# MAKE THE SUPP FILE
foo <-
pbar_annots %>%
filter(gene_name %in% genes) %>%
left_join(gsoi.1[[i]][,c(1,3)], by="gene_name") %>%
select(gene_name, gene_desc, enriched_annot=annot_desc, everything()) %>%
arrange(enriched_annot) %>%
distinct()
gsoi.1[[i]] <- foo
# print()
}
## running functional enrichment analyses for NOT-EXPRESSED genes in pbar_ld
## [1] "Loading annotation file for Pogonomyrmex barbatus"
## [1] "Done."
## [1] "Testing for enrichment..."
## running functional enrichment analyses for NOT-EXPRESSED genes in pbar_dd
## [1] "Loading annotation file for Pogonomyrmex barbatus"
## [1] "Done."
## [1] "Testing for enrichment..."
## loop ENDS
## Save results of not_expressed genes into an excel file
# supp <- list(
# "not_expressed_pbar_ld" = gsoi.1[[1]],
# "not_expressed_pbar_dd" = gsoi.1[[2]])
# writexl::write_xlsx(supp,
# path = paste0(path_to_repo,
# "/results/00_supplementary_files/01_gsoi_not_expressed_LD_v_DD.xlsx"))
expressed <- list()
for (i in 1:length(sample.name)) {
# which genes are expressed in ant heads?
expressed[[i]] <-
tbl(data.db, paste0(sample.name[i],"_expressed_genes")) %>%
filter(expressed=="yes") %>%
collect() %>%
pull(gene_name)
writeLines(paste0("How many genes are expressed in ", sample.name[i] ," heads?"))
length(expressed[[i]]) %>% print()
}
## How many genes are expressed in pbar_ld heads?
## [1] 10906
## How many genes are expressed in pbar_dd heads?
## [1] 10537
Check to see if these expressed genes are the same for LD and DD foragers
fishers_overlap(set.1 = expressed[[1]],
set.2 = expressed[[2]],
bg=nGenes) # background = all genes in pbar genome
## Contingency table:
##
## in.set.2 not.set.2
## in.set.1 10446 460
## not.set.1 91 1560
## Running fisher.test() on contingency table:
##
##
## Fisher's Exact Test for Count Data
##
## data: test.table
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
## 321.3235 Inf
## sample estimates:
## odds ratio
## 388.7773
Which genes/processes are expressed during both LD and DD?
# example of list input (list of named vectors)
listInput <- expressed
names(listInput) <- sample.name
library(UpSetR)
library(viridis)
# caste.col <- c("#F23030","#1A80D9")
upset.plot <-
upset(fromList(listInput),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Expressed genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput),
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom")
# plot it
print(upset.plot)
#
# dev.off()
writeLines("Which genes are expressed in LD but have no expression in DD?")
## Which genes are expressed in LD but have no expression in DD?
repressed.in.dd <-
intersect(expressed[[1]],
not.expressed[[2]]) %>% unique()
pbar_annots %>%
filter(gene_name %in% repressed.in.dd) %>%
left_join(dat.fpkm[[1]], by="gene_name") %>%
left_join(dat.fpkm[[2]], by="gene_name") %>%
DT::datatable()
repressed.in.dd %>%
# exp.plot(log=T) %>%
stacked.zplot(text_size = 35) %>%
multi.plot(rows = 2, cols=1)
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
writeLines("Let's look at the temporal expression of genes that are expressed during LD but not DD")
## Let's look at the temporal expression of genes that are expressed during LD but not DD
setdiff(expressed[[1]],
expressed[[2]]) %>%
stacked.zplot(text_size = 35) %>%
multi.plot(rows = 2, cols=1)
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
writeLines("What are the functions of these genes?")
## What are the functions of these genes?
setdiff(expressed[[1]],expressed[[2]]) %>%
check_enrichment2(org = "pbar",
what = "pfams",
bg = "all",
expand = T,
verbose = T) %>%
arrange(annot_desc, gene_name) %>%
DT::datatable()
## [1] "Loading annotation file for Pogonomyrmex barbatus"
## [1] "Done."
## Number of genes in background geneset: 11348
## Number of genes in the test set: 460
## --------------------------------
## Number of pfams terms in background geneset: 4575
## Number of pfams terms (at least 5genes) in background geneset: 569
## Number of pfams terms (at least 5 genes) in test set: 6
## [1] "Testing for enrichment..."
writeLines("Odorant receptor coreceptor: LOC105424270")
## Odorant receptor coreceptor: LOC105424270
"LOC105424270" %>%
exp.plot(log=F)
## [[1]]
dat.fpkm[[1]]
## # A tibble: 12,557 × 13
## gene_name ZTLD01 ZTLD03 ZTLD05 ZTLD07 ZTLD09 ZTLD11 ZTLD13 ZTLD15
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOC105421953 44.5 20.8 51.3 34.3 29.4 42.1 39.6 32.5
## 2 LOC105421955 9.39 8.98 7.98 8.32 7.60 6.80 10.0 7.28
## 3 LOC105421956 3.30 3.13 4.73 3.74 3.23 3.72 3.12 3.63
## 4 LOC105421957 0.0510 0.373 0.0585 0.136 0.138 0 0.217 0.105
## 5 LOC105421958 11.7 8.17 11.6 11.2 11.2 8.06 12.9 10.2
## 6 LOC105421959 24.9 22.5 27.1 22.6 24.2 22.4 25.2 23.8
## 7 LOC105421960 45.3 43.4 43.8 43.8 39.2 43.5 41.0 37.9
## 8 LOC105421961 241. 191. 215. 203. 171. 173. 184. 156.
## 9 LOC105421962 28.3 30.4 30.5 38.1 37.7 36.4 29.4 33.3
## 10 LOC105421963 32.6 29.5 37.3 35.0 30.2 33.5 26.1 29.9
## # ℹ 12,547 more rows
## # ℹ 4 more variables: ZTLD17 <dbl>, ZTLD19 <dbl>, ZTLD21 <dbl>, ZTLD23 <dbl>
writeLines("Let's take a look at the expression and function of genes that are expressed during constant dark but not LD")
## Let's take a look at the expression and function of genes that are expressed during constant dark but not LD
setdiff(expressed[[2]],expressed[[1]]) %>%
stacked.zplot(bg.alpha = .2) %>%
multi.plot(rows = 2, cols=1)
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
setdiff(expressed[[2]],expressed[[1]]) %>%
check_enrichment2(org = "pbar",
what = "pfams",
bg = "all",
expand = T,
verbose = T)
## [1] "Loading annotation file for Pogonomyrmex barbatus"
## [1] "Done."
## Number of genes in background geneset: 11348
## Number of genes in the test set: 91
## --------------------------------
## Number of pfams terms in background geneset: 4575
## Number of pfams terms (at least 5genes) in background geneset: 569
## Number of pfams terms (at least 5 genes) in test set: 1
## [1] "Testing for enrichment..."
## # A tibble: 39 × 3
## # Groups: gene_name [35]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 LOC105422457 uncharacterized protein LOC105422457 no_desc
## 2 LOC105422664 histone-lysine N-methyltransferase SETMAR-like no_desc
## 3 LOC105423520 uncharacterized protein LOC105423520 no_desc
## 4 LOC105423544 uncharacterized protein LOC105423544 no_desc
## 5 LOC105423917 kynurenine/alpha-aminoadipate aminotransferase, mito… no_desc
## 6 LOC105424031 LOW QUALITY PROTEIN: uncharacterized protein LOC1054… no_desc
## 7 LOC105424186 LOW QUALITY PROTEIN: saccharopine dehydrogenase-like… no_desc
## 8 LOC105424760 uncharacterized protein LOC105424760 no_desc
## 9 LOC105424948 uncharacterized protein LOC105424948 no_desc
## 10 LOC105425195 uncharacterized protein LOC105425195 no_desc
## # ℹ 29 more rows
Summary of the results:
General patterns of gene expression in Pbar forager brains in LD and DD
Differences in LD and DD
460 genes that were expressed only in LD, but not in DD, show a synchronized peak around ZT 3 (three hours after lights were turned on), which is lost during constant darkness.
Intriguingly, these 460 genes were primarily involved in olfaction and odorant binding, and included a copy of the odorant receptor co-receptor (LOC105424270) which is an essential component of olfactory pathways in ants (refer to orco work from Rockfeller/Kronaeur lab).
Odorant receptor genes were also overrepresented in the set of genes with no expression in the brains of pbar foragers collected in LD (~5% of 273 genes). However, the number of odorant binding genes that do not show any expression is relatively higher in constant darkness (~20% of 273 genes).
Taken together, it seems that odorant receptor genes [sensory perception of smell, odorant binding, olfactory receptor activity] that usually are expressed in forager brains under LD cycle show either a loss of expression (FPKM = 0) or a reduced expresssion (0 < FPKM < 1) under constant dark conditions.
rhy.24 <- list()
rhy.12 <- list()
rhy.08 <- list()
rhy.genes <- list()
for (i in 1:length(sample.name)) {
## Load all the rhythmic genesets
## Note, ordered according to their p-value; highly rhythmic at the top.
# Circadian genes (period = 24h)
## get the gene-names for sig. 24h-rhythmic genes
rhy.24[[i]] <-
tbl(ejtk.db, paste0(sample.name[i],"_zscores_24h")) %>%
filter(GammaP < gamma.pval) %>%
select(ID, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(ID) %>% pull()
# Ultradian genes (period = 12h)
## get the gene-names for sig. 12h-rhythmic genes
rhy.12[[i]] <-
tbl(ejtk.db, paste0(sample.name[i],"_zscores_12h")) %>%
filter(GammaP < gamma.pval) %>%
select(ID, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(ID) %>% pull()
# Ultradian genes (period = 08h)
## get the gene-names for sig. 08h-rhythmic genes
rhy.08[[i]] <-
tbl(ejtk.db, paste0(sample.name[i],"_zscores_08h")) %>%
filter(GammaP < gamma.pval) %>%
select(ID, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(ID) %>% pull()
writeLines(paste0("How many sig. rhythmic genes are there in ",
sample.name[i]," brains? (GammaP < ",gamma.pval, ")"))
rhy.genes[[i]] <- list(rhy.24[[i]],rhy.12[[i]],rhy.08[[i]])
names(rhy.genes[[i]]) <- paste0(sample.name[i], c("_24h", "_12h", "_08h"))
names(rhy.genes)[i] <- sample.name[[i]]
print(sapply(rhy.genes[[i]],length))
}
## How many sig. rhythmic genes are there in pbar_ld brains? (GammaP < 0.05)
## pbar_ld_24h pbar_ld_12h pbar_ld_08h
## 1536 179 354
## How many sig. rhythmic genes are there in pbar_dd brains? (GammaP < 0.05)
## pbar_dd_24h pbar_dd_12h pbar_dd_08h
## 1547 341 717
## load zscore dataset
zscore.dat <- list()
for (i in 1:length(sample.name)){
zscore.dat[[i]] <- data.db %>% tbl(., paste0(sample.name[i],"_zscores")) %>% collect()
}
goi.list[[1]] <- rhy.genes
names(goi.list) <- "rhy_genes"
Already loaded; variable names: rhy.24h, rhy.12h, rhy.08h
Let’s load rhythmic genes for forager brains (from TC5)
# specify path to TC5 data
path_to_repo_TC5 = "~/Documents/GitHub/Das_et_al_2021"
# load the TC5 database
tc5.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo_TC5, "/data/TC5_data.db"))
## Get the zscore data for forager brains
zscore.brains <- tc5.db %>% tbl(., "zscore_for") %>% collect()
## save the rhythmic genes identified in forager brains
# 24h
for24.TC5 <-
tbl(tc5.db, "ejtk_all") %>%
filter(caste == "for" & rhy == "yes") %>%
select(gene_name, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(gene_name) %>% pull()
# 12h
for12.TC5 <-
tbl(tc5.db, "ejtk_12h_all") %>%
filter(caste == "for" & rhy == "yes") %>%
select(gene_name, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(gene_name) %>% pull()
# 08h
for08.TC5 <-
tbl(tc5.db, "ejtk_8h_all") %>%
filter(caste == "for" & rhy == "yes") %>%
select(gene_name, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(gene_name) %>% pull()
writeLines("How many genes were identified as sig. rhythmic in forager brains? (TC5)")
## How many genes were identified as sig. rhythmic in forager brains? (TC5)
rhy.genes.tc5 <- list(for24.TC5, for12.TC5, for08.TC5)
names(rhy.genes.tc5) <- paste0("Cflo-brain", c("_24h", "_12h", "_08h"))
sapply(rhy.genes.tc5,length)
## Cflo-brain_24h Cflo-brain_12h Cflo-brain_08h
## 3569 148 229
sapply(rhy.genes[[1]], length)
## pbar_ld_24h pbar_ld_12h pbar_ld_08h
## 1536 179 354
# example of list input (list of named vectors)
listInput <- list(rhy.24[[1]],rhy.12[[1]],rhy.08[[1]],
rhy.24[[2]],rhy.12[[2]],rhy.08[[2]])
names(listInput) <- c(paste0("Pbar-LD", c("_24h","_12h","_08h")),
paste0("Pbar-DD", c("_24h","_12h","_08h")))
Circadian + Ultradian rhythms
library(UpSetR)
library(viridis)
# caste.col <- c("#F23030","#1A80D9")
upset(fromList(listInput),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Sig. rhy genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput),
nintersects = 15,
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom"
)
Circadian only
listInput2 <- list(rhy.24[[1]], rhy.24[[2]])
names(listInput2) <- c(paste0("Pbar-LD", c("_24h")), paste0("Pbar-DD", c("_24h")))
upset(fromList(listInput2),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Sig. rhy genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput2),
nintersects = 15,
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom"
)
## Let's plot the heatmaps to tease apart the patterns
goi <- intersect(rhy.24[[1]], rhy.24[[2]])
# make a list to save the cluster information
my_gene_col <- list()
# Filter the zscores to keep only the genes of interest
zscore.dummy <-
zscore.dat[[1]] %>%
# Keep only sig. 24h-rhythmic genes in control Cflo heads
filter(gene_name %in% goi) %>%
## Concat. data from ophio- and beau-infected Cflo heads
left_join(zscore.dat[[2]], by="gene_name") %>%
as.data.frame()
# Set genes as rownames and convert it into a matrix
rownames(zscore.dummy) = zscore.dummy$gene_name
zscore.dummy <- as.matrix(zscore.dummy[-1])
# Hierarchical clustering the geneset
my_hclust_gene <- hclust(dist(zscore.dummy), method = "complete")
# my_hclust_gene <- hclust(dist(zscore.rhy), method = "complete")
# Make annotations for the heatmaps
n_clusters <- 4
my_gene_col <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # four clusters
my_gene_col <- data.frame(cluster = my_gene_col)
# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase?
my_sample_col <-
data.frame(phase = rep(rep(c("light", "dark"), c(6,6)),2),
conds = rep(c("pbar_ld","pbar_dd"), each=12))
row.names(my_sample_col) <- colnames(zscore.dummy)
# Manual color palette
my_colour = list(
phase = c(light = "#F2E18D", dark = "#010440"),
conds = c(pbar_ld = "#AD212F", pbar_dd = "#5A829F"),
cluster = viridis::cividis(100)[c(seq(10,90,by=round(80/(n_clusters), 0)))])
# Color scale
my.breaks = seq(min(na.omit(zscore.dummy)),
max(na.omit(zscore.dummy)), by=0.06)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
# colnames(zscore.dummy) <- c(rep(c("","",6,"","",12,
# "","",18,"","",24),2))
# Let's plot!
pheatmap(zscore.dummy, show_rownames = F,
show_colnames = F,
# angle_col = c("0"),
annotation_row = my_gene_col,
annotation_col = my_sample_col,
# cutree_rows = 4,
gaps_row = c(12),
# cutree_cols = 3,
gaps_col = c(12,24),
annotation_colors = my_colour,
border_color=FALSE,
cluster_rows = T,
cluster_cols = F,
breaks = my.breaks,
## color scheme borrowed from:
color = inferno(length(my.breaks) - 1),
treeheight_row = 0,
# treeheight_col = 0,
# remove the color scale or not
main = paste0("24h-rhythmic in \nLD and DD",
"\n n(genes) = ",
length(rownames(zscore.dummy))),
## annotation legend
annotation_legend = T,
## Color scale
legend = T)
Let’s look at the expression of genes in Cluster 1 and 2 from the above heatmap
## Let's look Cluster 1
for (i in 1:2){
writeLines(paste0("Plotting for cluster: ", i))
my_gene_col %>%
rownames_to_column("g") %>%
filter(cluster==i) %>%
pull(g) %>%
stacked.zplot() %>%
multi.plot(rows=2,cols=1) %>%
print()
writeLines(paste0("Plotting amplitudes.."))
my_gene_col %>%
rownames_to_column("g") %>%
filter(cluster==i) %>%
pull(g) %>%
amplitude.plot(stats = T) %>%
print()
}
## Plotting for cluster: 1
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## Plotting amplitudes..
##
## Kruskal wallis test found significant differences in means (q<0.05)
##
##
## Summary stats:
##
## set mean_amp sd se ci
## 1 pbar_ld 2.956712 0.2793328 0.01637478 0.03222848
## 2 pbar_dd 3.261930 0.2824101 0.01655517 0.03258353
##
## Performing pairwise Wilcox test...
## # A tibble: 1 × 6
## .y. group1 group2 p.adj p.format method
## <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 amp pbar_ld pbar_dd 9.40e-33 <2e-16 Wilcoxon
## Plotting for cluster: 2
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## Plotting amplitudes..
##
## Kruskal wallis test did not find any evidence for difference in means (q > 0.05)
rhy.24[[1]] |> length()
## [1] 1536
rhy.24[[2]] |> length()
## [1] 1547
intersect(rhy.24[[1]], rhy.24[[2]]) |>
check_enrichment2(org = "pbar",
what = "pfams",
bg = "all",
expand = T,
verbose = T)
## [1] "Loading annotation file for Pogonomyrmex barbatus"
## [1] "Done."
## Number of genes in background geneset: 11348
## Number of genes in the test set: 345
## --------------------------------
## Number of pfams terms in background geneset: 4575
## Number of pfams terms (at least 5genes) in background geneset: 569
## Number of pfams terms (at least 5 genes) in test set: 11
## [1] "Testing for enrichment..."
## # A tibble: 66 × 3
## # Groups: gene_name [36]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 LOC105422056 arf-GAP with coiled-coil, ANK repeat and PH domain-c… PH domain
## 2 LOC105422056 arf-GAP with coiled-coil, ANK repeat and PH domain-c… PH domain
## 3 LOC105422056 arf-GAP with coiled-coil, ANK repeat and PH domain-c… PH domain
## 4 LOC105422314 transient receptor potential channel pyrexia-like Ankyrin r…
## 5 LOC105422894 uncharacterized LOC105422894 isoform X1 Protein t…
## 6 LOC105422894 uncharacterized LOC105422894 precursor Protein t…
## 7 LOC105423197 pleckstrin homology domain-containing family M membe… PH domain
## 8 LOC105423260 cardioacceleratory peptide receptor 7 transme…
## 9 LOC105423379 GA-binding protein subunit beta-2 Ankyrin r…
## 10 LOC105424294 LOW QUALITY PROTEIN: oxysterol-binding protein-relat… PH domain
## # ℹ 56 more rows
pbar_annots |>
filter(
gene_name %in% intersect(rhy.24[[1]], rhy.24[[2]])
) |>
distinct() |>
DT::datatable()
Rhy genes: Pbar-LD vs. Pbar-DD
## MAKE YOUR LIST OF GENES OF INTEREST ##
# LIST ONE - control rhy genes
list1 <- rhy.genes[[1]]
names(list1) <- paste0("Pbar_LD", c("_24h","_12h","_08h"))
## LIST TWO - ophio rhythmic genes
list2 <- rhy.genes[[2]]
names(list2) <- paste0("Pbar_DD", c("_24h","_12h","_08h"))
# ## LIST FOUR - forager brains rhy genes
# list4 <- rhy.genes.tc5
# names(list4) <- paste0("Cflo-brain", c("_24h","_12h","_08h"))
## CHECK FOR OVERLAP
library(GeneOverlap)
# how many genes are expressed in Pogo forager brains
nGenesExp = expressed %>% unlist() %>% unique() %>% length()
## make a GOM object
gom.1v2 <- newGOM(list1, list2, genome.size = nGenesExp)
writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
drawHeatmap(gom.1v2,
adj.p=T,
cutoff=0.05,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "grey80")
Rhy genes: Pbar-LD vs. Cflo-LD
## MAKE YOUR LIST OF GENES OF INTEREST ##
# LIST ONE - control rhy genes
list1 <- rhy.genes[[1]]
names(list1) <- paste0("Pbar_LD", c("_24h","_12h","_08h"))
## LIST TWO - ophio rhythmic genes
list2.cflo <- sapply(rhy.genes.tc5, cflo_to_pogo)
names(list2.cflo) <- paste0("Cflo_LD", c("_24h","_12h","_08h"))
## CHECK FOR OVERLAP
library(GeneOverlap)
# how many genes are expressed in Pogo forager brains
nGenesExp = expressed %>% unlist() %>% unique() %>% length()
## make a GOM object
gom.1v2.cflo <- newGOM(list1, list2.cflo, genome.size = nGenesExp)
writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
drawHeatmap(gom.1v2.cflo,
adj.p=T,
cutoff=0.05,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "grey80")
Get a summary for the rhythmic properties of each geneset
my.colors <- c(pbar_ld = "#AD212F", pbar_dd = "#5A829F")
rhy.24.sig <- list()
phase.ejtk <- list()
# Obtain the phases of 24h-rhythmic genes in controls v. ophio-inf v. beau-inf ants
for (i in 1:length(sample.name)) {
rhy.24.sig[[i]] <-
tbl(ejtk.db, paste0(sample.name[i],"_zscores_24h")) %>%
filter(GammaP < gamma.pval) %>%
collect()
# Get the phases of the best matched waveforms
phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$Phase,
units="hours", template="clock24")
}
# Circular stats - tests --------------------------------------------------
# save all the circular phases in a list
l.phases <- phase.ejtk
# let's name the list elements for later use and reference
names(l.phases) <- names(my.colors)
# Watson Test -------------------------------------------------------------
# A Watson Test determines if two groups’ orientations are significantly different from each other.
# Here, it will allow us to test whether the mean phase predicted by eJTK and RAIN
# are significiantly different or not.
# ?watson.two.test #help file on this statistical test
writeLines("Performing Watson test to check if the average peak of 24h-rhythms has significantly shifted during constant dark conditions")
## Performing Watson test to check if the average peak of 24h-rhythms has significantly shifted during constant dark conditions
# For all rhy genes
ld.dd <- watson.two.test(l.phases[[1]],l.phases[[2]], alpha = 0.001)
ld.dd %>% print()
##
## Watson's Two-Sample Test of Homogeneity
##
## Test Statistic: 4.906
## Level 0.001 Critical Value: 0.385
## Reject Null Hypothesis
# Initialize a list for saving the ggplots
g <- list()
means <- as.numeric(lapply(phase.ejtk, mean))
means <- circular(means, units="hours", template="clock24")
for(i in 1:length(l.phases)) {
# define phase levels
ordered_phases <- c("2","4","6","8","10","12",
"14","16","18","20","22","24")
df.test <- l.phases[[i]] %>%
as.data.frame() %>%
mutate(phase = x) %>%
mutate(phase = replace(phase, x=="0", "24")) %>%
select(-x) %>%
group_by(phase = factor(phase, levels = ordered_phases)) %>%
summarise(n_genes = n())
m <- as.numeric(means[i])
g[[i]] <-
ggplot(df.test, aes(x=factor(phase), y=n_genes)) +
# indicate light-dark phase
geom_rect(aes(xmin = as.factor(12), xmax = as.factor(24),
ymin = -Inf, ymax = Inf),
fill = "grey80", alpha = 0.05, color=NA) +
geom_bar(stat='identity', fill=my.colors[[i]]) +
xlab(c(names(my.colors)[i])) +
scale_y_continuous(breaks = c(0,200,400), limits = c(0,450)) +
coord_polar() +
theme_bw() +
theme(text = element_text(size = 15, colour = 'black'),
axis.title.y=element_blank(),
# axis.text.x=element_blank(),
legend.position = "none")
#ggtitle(paste0("Dataset: ", names(l.phases)[i]))
print(m)
}
## [1] -5.477092
## [1] 1.278507
ggpubr::ggarrange(plotlist=g,
nrow = 1, ncol = 2,
widths = c(1,1), labels = NA)
Results:
This step is not required here. Most likely, we can use the following step to identify smaller clusters within a module of interest (see scripts 02 and 03)
# make a list to save the cluster information
my_gene_col <- list()
# Filter the zscores to keep only rhythmic genes
for (i in 1:length(rhy.24)){
writeLines(paste0("Clustering and plotting heatmaps for ", sample.name[[i]]))
# filter the zscores for the rhythmic geneset
zscore.rhy <-
zscore.dat[[i]] %>%
# Keep only sig. 24h-rhythmic genes in control Cflo heads
filter(gene_name %in% rhy.24[[i]]) %>%
## Concat. data from ophio- and beau-infected Cflo heads
# left_join(zscore.dat[[2]], by="gene_name") %>%
as.data.frame()
# Set genes as rownames and convert it into a matrix
rownames(zscore.rhy) = zscore.rhy$gene_name
zscore.rhy <- as.matrix(zscore.rhy[-1])
# Hierarchical clustering the geneset
my_hclust_gene <- hclust(dist(zscore.rhy), method = "complete")
# Make annotations for the heatmaps
my_gene_col[[i]] <- cutree(tree = as.dendrogram(my_hclust_gene), k = 4) # four clusters
my_gene_col[[i]] <- data.frame(cluster = my_gene_col[[i]])
# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase?
my_sample_col <-
data.frame(phase = rep(rep(c("light", "dark"), c(6,6)),1),
conds = rep(sample.name[i], each=12))
row.names(my_sample_col) <- colnames(zscore.rhy)
# Manual color palette
my_colour = list(
phase = c(light = "#F2E18D", dark = "#010440"),
conds = c(my.colors[i]),
cluster = viridis::cividis(100)[c(seq(10,90,by=round(80/(n_clusters), 0)))])
# Color scale
my.breaks = seq(min(na.omit(zscore.rhy)), max(na.omit(zscore.rhy)), by=0.06)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
# Let's plot!
pheatmap(zscore.rhy, show_rownames = F, show_colnames = F,
annotation_row = my_gene_col[[i]],
annotation_col = my_sample_col,
# cutree_rows = 4,
# cutree_cols = 3,
# gaps_col = c(12,24),
annotation_colors = my_colour,
border_color=FALSE,
cluster_rows = T,
cluster_cols = F,
breaks = my.breaks,
## color scheme borrowed from:
color = inferno(length(my.breaks) - 1),
treeheight_row = 0,
# treeheight_col = 0,
# remove the color scale or not
main = paste0(sample.name[[i]], "\n n(genes) = ",
length(rownames(zscore.rhy))),
## annotation legend
annotation_legend = T,
## Color scale
legend = T
)
}
## Clustering and plotting heatmaps for pbar_ld
## Clustering and plotting heatmaps for pbar_dd
The differentially rhythmic genes (DRGs) here refer to genes that undergo changes in their periodicity of rhythmic expression (8h, 12h, or 24h) between different conditions. For example, the ~300 genes that switches from oscillating every 24h in forager brains to every 8h in nurses.
# Let's load functions for running limorhyde
# source(system.file('extdata', 'vignette_functions.R', package = 'limorhyde'))
# Let's load the libraries required for running Limorhyde
# library('annotate')
library('data.table')
library('foreach')
# library('GEOquery')
library('ggplot2')
library('knitr')
library('limma')
library('limorhyde')
conflict_prefer("union", "dplyr")
I need to create a csv file with the metadata information for the different samples collected
sample.name <- c("pbar_ld","pbar_dd")
short.name <- c("ld","dd")
time.points <- seq(1,23,2)
light.dark <- c(rep("light",times=6), rep("dark",times=6))
TC9.meta <- data.frame(title = paste0(rep(sample.name, each=12),"_ZT",time.points),
sample = paste0(rep(time.points, times=2),rep(short.name, each=12)),
genotype = rep(sample.name, each=12),
time = rep(time.points, times=2),
cond = rep(sample.name, each=12),
LD = rep(light.dark, times=2),
stringsAsFactors = F)
TC9.meta %>% glimpse()
## Rows: 24
## Columns: 6
## $ title <chr> "pbar_ld_ZT1", "pbar_ld_ZT3", "pbar_ld_ZT5", "pbar_ld_ZT7", "…
## $ sample <chr> "1ld", "3ld", "5ld", "7ld", "9ld", "11ld", "13ld", "15ld", "1…
## $ genotype <chr> "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_…
## $ time <dbl> 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 1, 3, 5, 7, 9, 11,…
## $ cond <chr> "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_…
## $ LD <chr> "light", "light", "light", "light", "light", "light", "dark",…
Now, format the metadata.
### 1.1.1 Format the meta-data ----------------
# load the meta-data
sm <- TC9.meta
# Let's format the columns in the right data-type
sm$time <- as.numeric(sm$time)
# sm$batch <- as.factor(sm$batch)
sm$LD <- as.factor(sm$LD)
# sm$location <- as.factor(sm$location)
# Let's get a glimpse of the metadata
sm %>%
glimpse()
## Rows: 24
## Columns: 6
## $ title <chr> "pbar_ld_ZT1", "pbar_ld_ZT3", "pbar_ld_ZT5", "pbar_ld_ZT7", "…
## $ sample <chr> "1ld", "3ld", "5ld", "7ld", "9ld", "11ld", "13ld", "15ld", "1…
## $ genotype <chr> "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_…
## $ time <dbl> 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 1, 3, 5, 7, 9, 11,…
## $ cond <chr> "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_ld", "pbar_…
## $ LD <fct> light, light, light, light, light, light, dark, dark, dark, d…
# Next we use limorhyde to calculate time_cos and time_sin, which are based on the first
#harmonic of a Fourier decomposition of the time column, and append them to the sm data frame.
sm = cbind(sm, limorhyde(sm$time, 'time_'))
# convert the dataframe into a data.table
sm <- data.table(sm)
# check that it worked
sm[1:5, ]
## title sample genotype time cond LD time_cos time_sin
## 1: pbar_ld_ZT1 1ld pbar_ld 1 pbar_ld light 0.9659258 0.2588190
## 2: pbar_ld_ZT3 3ld pbar_ld 3 pbar_ld light 0.7071068 0.7071068
## 3: pbar_ld_ZT5 5ld pbar_ld 5 pbar_ld light 0.2588190 0.9659258
## 4: pbar_ld_ZT7 7ld pbar_ld 7 pbar_ld light -0.2588190 0.9659258
## 5: pbar_ld_ZT9 9ld pbar_ld 9 pbar_ld light -0.7071068 0.7071068
Get the FPKM data
### 1.1.2 Format the expression data ----------------
# Control - Expressed
#
# extract the (gene-expr X time-point) data
pbar.ld.dat <-
data.db %>%
tbl(., paste0(sample.name[1] ,"_fpkm")) %>%
select(gene_name, everything()) %>%
collect()
# count the number of time points that has ≥ 1 FPKM
n.expressed <- apply(pbar.ld.dat[-1], 1, function(x) sum(x >= 1))
# subset the data and only keep the filtered genes
pbar.ld.dat <- pbar.ld.dat[which(n.expressed >=6),] # 9591 genes
# get the data for both infected ants
# initialize a list to save the datasets
emat <- list()
for (i in 2:length(sample.name)) {
# extract the (gene-expr X time-point) data
inf.dat <-
data.db %>%
tbl(., paste0(sample.name[i] ,"_fpkm")) %>%
select(gene_name, everything()) %>%
collect()
#
# count the number of time points that has ≥ 1 FPKM
n.expressed <- apply(inf.dat, 1, function(x) sum(x >= 1))
# subset the data and only keep the filtered genes
inf.dat <- inf.dat[which(n.expressed >=6),]
# Merge the two tables into one
emat[[i-1]] <- pbar.ld.dat %>%
# keep only the genes that are expressed in all conditions
# (Note: there are 4 genes that are expressed in foragers but not in nurses)
filter(gene_name %in% inf.dat$gene_name) %>%
left_join(inf.dat, by = "gene_name") %>%
as.data.frame()
# # Did it work?
# head(emat[[i-1]]) %>% print()
}
# Create the log2 transformed input matrix for identifying DEGs ----------------------------
for (i in 1:length(emat)) {
# save gene names as row names
rownames(emat[[i]]) <- emat[[i]][,1]
emat[[i]] <- emat[[i]][,-1]
# summary(emat[[i]]) %>% print()
# Let's remove NAs
emat[[i]] <- na.omit(emat[[i]])
# Need to make the emat into a matrix.
emat[[i]] <- data.matrix(emat[[i]])
# emat[[i]][1:5,1:5] %>% print()
# class(emat) # is a matrix with samples=columns and genes=rows
## log2 transform the data
emat[[i]] <- log2(emat[[i]] + 1)
}
# 1.2 Identify DEGs ----------------------------------------------
# Identify DEGs:
### Differential expression is based on the coefficient for cond in a linear model with no
### interaction terms. We pass all genes to limma, but keep results only for non-differentially
### rhythmic genes, and adjust for multiple testing accordingly.
# Set threshold for q-value (BH adjusted p-value)
q.threshold <- 0.05 # currently, using 5% FDR
log2.foldchange <- 1 # thus, any gene with a 2^(log2.foldchange) fold change in it's expression
# 1.2.1 all rhy genes -----------------------------------------------
# initialize the list to save the results of the DEG analysis to
all.DEGs <- list()
for(i in 1:length(emat)){
# Filter the metadata according to your comparison
sm.sub <- sm %>% filter(cond %in% c(sample.name[1],sample.name[i+1]))
# Define the cond column as a factor
# sm.control.ophio$genotype <- as.factor(sm$genotype)
sm.sub$cond <- as.factor(sm.sub$cond)
# Use the subsetted emat to find DEGs
design.deg = model.matrix(~ cond + time_cos + time_sin, data = sm.sub)
fit = lmFit(emat[[i]], design.deg)
fit = eBayes(fit, trend = TRUE)
# Take a look at the coefficients table
fit$coefficients %>% head()
deLimma.deg = data.table(topTable(fit, coef = 2, number = Inf), keep.rownames = TRUE)
setnames(deLimma.deg, 'rn', 'gene_name')
deLimma.deg[, adj.P.Val := p.adjust(P.Value, method = 'BH')]
setorderv(deLimma.deg, 'adj.P.Val')
# deLimma.deg %>%
# arrange(adj.P.Val) %>%
# head()
# Save the results of the DEG analysis
# save(deLimma.deg,
# file = "~/Documents/GitHub/R-scripts_zombie_ant_lab/Functions/data/DEGs/TC5_DEG_results.RData")
##
## Done.
# # Load the results of the DEG analysis (dataframe: deLimma.deg)
# load(file = "~/Documents/GitHub/R-scripts_zombie_ant_lab/Functions/data/DEGs/TC5_DEG_results.RData")
# Annotate the results to indicate the significant genes
all.DEGs[[i]] <-
deLimma.deg %>%
arrange(adj.P.Val) %>%
mutate(sig = as.factor(ifelse(adj.P.Val < q.threshold & abs(logFC) >= log2.foldchange, "yes", "no"))) %>%
mutate(set1_v_set2 = as.factor(ifelse(sig=="yes", ifelse( logFC > 0, "up", "down" ), "NA"))) %>%
mutate(inf = sample.name[i+1])
# head(all.DEGs)
writeLines(paste0("Set 1: ",sample.name[i],"\nSet 2: ", sample.name[i+1], "\n--Results of DEG analysis--"))
## How many DEGs - 5% FDR and ≥ 1 fold change in gene expression
all.DEGs[[i]] %>%
# filter(adj.P.Val < q.threshold) %>%
# filter(abs(logFC) >= 2) %>% # change the criteria here for top DEG or all DEG (logFC≥1)
filter(sig == "yes") %>%
# pull(gene_name) %>%
# exp.plot(log = T) %>%
# pluck(1) %>%
# multi.plot(rows = 5, cols = 5)
# the foraging gene
# filter(geneId == "LOC105255628") # is not sig DE
group_by(set1_v_set2) %>%
summarise(n_genes = n()) %>%
as.data.frame() %>%
## n = 81 up- and 141 down-regulated genes in Cflo heads during Ophio-infection
## (at 5% FDR; log2-fold-change ≥ 1)
print()
}
## Set 1: pbar_ld
## Set 2: pbar_dd
## --Results of DEG analysis--
## set1_v_set2 n_genes
## 1 down 1
## 2 up 10
# ## Save the results of the DEG analysis
# all.DEGs %>%
# saveRDS(., file = paste0(path_to_repo,"/results/cflo/","cflo_inf_v_control_DEGs.Rds"))
# Volcano plot
library(viridis)
# png("~/OneDrive - University of Central Florida/BD-TC5/TC5_Paper/results_figs/volcano_for_nur_DEGs.png",
# width = 14, height = 10, units = "cm", res = 300)
for (i in 1:length(all.DEGs)) {
plot <- ggplot(all.DEGs[[i]]) +
# geom_hline(yintercept = -log10(0.05), col="red", alpha=0.6) +
# geom_vline(xintercept = c(-2,2), col="grey60", alpha=0.75) +
geom_point(aes(x = logFC, y = -log10(adj.P.Val), color=sig), size = 1.5, alpha = 0.5) +
labs(x = expression(log[2]*' fold-change (LD v DD)'),
y = expression(-log[10]*' '*q[DE]),
# title = as.character(sample.name[i+1]),
color = "significant") +
# scale_x_continuous(limits = c(-5,3),
# breaks = c(-5,-4,-3,-2,-1,0,1,2,3),
# labels = c("-5","","-3","","-1","","1","","3")) +
# xlim(c(-50,50)) +
theme_Publication(20) +
scale_color_viridis(discrete = T, direction = -1, option = "viridis")
print(plot)
# annotation_custom(grid::textGrob(paste0( nrow(tc5.all.DEGs[tc5.all.DEGs$sig == "yes", ]),
# " DEGs ",
# "(tested: ", nrow(tc5.all.DEGs),"genes)")),
# xmin = -4, xmax=2, ymin = 7, ymax = 8)
# dev.off()
}
Most genes did not show a substantial change in their expression levels in DD vs. LD (fold-change < 2). How does this result compare to what is known in flies, mice, and humans?
10 genes, however, showed at least a two fold increase in Pbar brains during constant darkness. What are these genes?
up.degs <- all.DEGs[[i]] %>%
filter(sig == "yes") %>%
filter(set1_v_set2 == "up") %>%
pull(1)
down.degs <- all.DEGs[[i]] %>%
filter(sig == "yes") %>%
filter(set1_v_set2 == "down") %>%
pull(1)
all.degs <- c(up.degs, down.degs)
deg.plots <- list()
for (i in 1:length(all.degs)) {
g <- all.degs[i]
deg.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
deg.plots %>%
multi.plot(cols=3, rows=4)
## TableGrob (4 x 3) "arrange": 11 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (2-2,1-1) arrange gtable[layout]
## 5 5 (2-2,2-2) arrange gtable[layout]
## 6 6 (2-2,3-3) arrange gtable[layout]
## 7 7 (3-3,1-1) arrange gtable[layout]
## 8 8 (3-3,2-2) arrange gtable[layout]
## 9 9 (3-3,3-3) arrange gtable[layout]
## 10 10 (4-4,1-1) arrange gtable[layout]
## 11 11 (4-4,2-2) arrange gtable[layout]
pbar_annots %>%
filter(gene_name %in% all.degs) %>%
distinct() %>%
DT::datatable()
writeLines("We do not have any information for the following genes:")
## We do not have any information for the following genes:
p <- pbar_annots %>%
filter(gene_name %in% all.degs) %>%
pull(1) %>% unique()
setdiff(all.degs, p)
## [1] "LOC105428181" "LOC112552725" "LOC105424732" "LOC112552305"
writeLines("Checked NCBI gene database. All four genes are classified as uncharacterized.")
## Checked NCBI gene database. All four genes are classified as uncharacterized.
clock.genes <- c("LOC105430824", # period
"LOC105432451", # clock
"LOC105432453", # CK1 - isoform A (Doubletime)
"LOC105433038", # CK1 - isoform B
"LOC105424116", # CK2 - alpha
"LOC105424152", # CK2 - beta
"LOC105428546", # PKC
"LOC105427360", # atypical PKC
"LOC105429295", # Gsk3b or Shaggy
"LOC105433421", # nemo
"LOC105423237", # PP1b
"LOC105423565", # PP1
"LOC105433570", # rhodopsin-like A
"LOC105433575", # rhodopsin-like B
"LOC105424019", # rhodopsin-like C
"LOC105430942", # mAchR - gar2
"LOC105428149", # mAchR - DM1
"LOC105427051", # DopEcR
"LOC105424889", # PDF receptor
"LOC105424515", # PDF
"LOC105431243", # lark
"LOC105431457", # Trapped in endoderm 1
"LOC105424638" # Slowpoke
)
clock.plots <- list()
for (i in 1:length(clock.genes)) {
g <- clock.genes[i]
clock.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
clock.plots %>%
multi.plot(cols=5, rows=5)
## TableGrob (5 x 5) "arrange": 23 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (1-1,5-5) arrange gtable[layout]
## 6 6 (2-2,1-1) arrange gtable[layout]
## 7 7 (2-2,2-2) arrange gtable[layout]
## 8 8 (2-2,3-3) arrange gtable[layout]
## 9 9 (2-2,4-4) arrange gtable[layout]
## 10 10 (2-2,5-5) arrange gtable[layout]
## 11 11 (3-3,1-1) arrange gtable[layout]
## 12 12 (3-3,2-2) arrange gtable[layout]
## 13 13 (3-3,3-3) arrange gtable[layout]
## 14 14 (3-3,4-4) arrange gtable[layout]
## 15 15 (3-3,5-5) arrange gtable[layout]
## 16 16 (4-4,1-1) arrange gtable[layout]
## 17 17 (4-4,2-2) arrange gtable[layout]
## 18 18 (4-4,3-3) arrange gtable[layout]
## 19 19 (4-4,4-4) arrange gtable[layout]
## 20 20 (4-4,5-5) arrange gtable[layout]
## 21 21 (5-5,1-1) arrange gtable[layout]
## 22 22 (5-5,2-2) arrange gtable[layout]
## 23 23 (5-5,3-3) arrange gtable[layout]
dopamine.genes <-
pbar_annots %>%
mutate(gene_desc = clean_text(gene_desc)) %>%
filter(str_detect(gene_desc,"dopamine")) %>%
pull(gene_name) %>%
unique()
## Removed leading and trailing spaces.
##
## Removed special characters.
##
## Characters changed to lowercase.
## One of the Pbar genes (LOC105425404) is mislabelled
## LOC105425404 should be 5-HT receptor 2B
dopamine.genes <- dopamine.genes[dopamine.genes!="LOC105425404"]
dopamine.plots <- list()
for (i in 1:length(dopamine.genes)) {
g <- dopamine.genes[i]
dopamine.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
pbar_annots %>%
select(gene_name, gene_desc) %>%
filter(gene_name %in% dopamine.genes) %>%
distinct()
## # A tibble: 16 × 2
## gene_name gene_desc
## <chr> <chr>
## 1 LOC105424411 dopamine N-acetyltransferase-like isoform X1
## 2 LOC105424411 dopamine N-acetyltransferase-like isoform X2
## 3 LOC105424411 dopamine N-acetyltransferase-like isoform X3
## 4 LOC105431625 dopamine receptor 2
## 5 LOC105432456 dopamine receptor 1 isoform X3
## 6 LOC105432456 dopamine receptor 1 isoform X2
## 7 LOC105432456 dopamine receptor 1 isoform X1
## 8 LOC105432456 dopamine receptor 1 isoform X4
## 9 LOC105433448 LOW QUALITY PROTEIN: sodium-dependent dopamine transporter
## 10 LOC105428795 dopamine D2-like receptor isoform X5
## 11 LOC105428795 dopamine D2-like receptor isoform X4
## 12 LOC105428795 dopamine D2-like receptor isoform X3
## 13 LOC105428795 dopamine D2-like receptor isoform X2
## 14 LOC105428795 dopamine D2-like receptor isoform X1
## 15 LOC105423055 dopamine N-acetyltransferase-like isoform X1
## 16 LOC105423055 dopamine N-acetyltransferase-like isoform X2
dopamine.plots %>%
multi.plot(cols=2, rows=3)
## TableGrob (3 x 2) "arrange": 6 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
## 5 5 (3-3,1-1) arrange gtable[layout]
## 6 6 (3-3,2-2) arrange gtable[layout]
Only 1 unique found: LOC105428614 | 5-hydroxytryptamine receptor 2A-like
Another gene (LOC105425404) has
serotonin.genes <-
pbar_annots %>%
filter(str_detect(gene_desc,"5-hydroxytryptamine")) %>%
pull(gene_name) %>%
unique()
dopa.serotonin.genes <- c(dopamine.genes, serotonin.genes)
dopa.serotonin.plots <- list()
for (i in 1:length(dopa.serotonin.genes)) {
g <- dopa.serotonin.genes[i]
dopa.serotonin.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
dopa.serotonin.plots %>%
multi.plot(cols=3, rows=3)
## TableGrob (3 x 3) "arrange": 8 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (2-2,1-1) arrange gtable[layout]
## 5 5 (2-2,2-2) arrange gtable[layout]
## 6 6 (2-2,3-3) arrange gtable[layout]
## 7 7 (3-3,1-1) arrange gtable[layout]
## 8 8 (3-3,2-2) arrange gtable[layout]
cflo_annots %>%
filter(gene_name %in% pogo_to_cflo(serotonin.genes)) %>%
select(1:2)
## # A tibble: 2 × 2
## gene_name gene_desc
## <chr> <chr>
## 1 LOC105255693 5-hydroxytryptamine receptor 2B
## 2 LOC105257992 5-hydroxytryptamine receptor 2A
octopamine.genes <-
pbar_annots %>%
filter(str_detect(gene_desc,"octopamine")) %>%
pull(gene_name) %>%
unique()
jh.genes <-
pbar_annots %>%
filter(str_detect(gene_desc,"juvenile hormone")) %>%
pull(gene_name) %>%
unique()
ecdy.genes <-
pbar_annots %>%
filter(str_detect(gene_desc,"ecdy")) %>%
pull(gene_name) %>%
unique()
plasticity.genes <- c(octopamine.genes,
jh.genes,
ecdy.genes)
plasticity.plots <- list()
for (i in 1:length(plasticity.genes)) {
g <- plasticity.genes[i]
plasticity.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
plasticity.plots %>%
multi.plot(cols=5, rows=6)
## TableGrob (6 x 5) "arrange": 30 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (1-1,5-5) arrange gtable[layout]
## 6 6 (2-2,1-1) arrange gtable[layout]
## 7 7 (2-2,2-2) arrange gtable[layout]
## 8 8 (2-2,3-3) arrange gtable[layout]
## 9 9 (2-2,4-4) arrange gtable[layout]
## 10 10 (2-2,5-5) arrange gtable[layout]
## 11 11 (3-3,1-1) arrange gtable[layout]
## 12 12 (3-3,2-2) arrange gtable[layout]
## 13 13 (3-3,3-3) arrange gtable[layout]
## 14 14 (3-3,4-4) arrange gtable[layout]
## 15 15 (3-3,5-5) arrange gtable[layout]
## 16 16 (4-4,1-1) arrange gtable[layout]
## 17 17 (4-4,2-2) arrange gtable[layout]
## 18 18 (4-4,3-3) arrange gtable[layout]
## 19 19 (4-4,4-4) arrange gtable[layout]
## 20 20 (4-4,5-5) arrange gtable[layout]
## 21 21 (5-5,1-1) arrange gtable[layout]
## 22 22 (5-5,2-2) arrange gtable[layout]
## 23 23 (5-5,3-3) arrange gtable[layout]
## 24 24 (5-5,4-4) arrange gtable[layout]
## 25 25 (5-5,5-5) arrange gtable[layout]
## 26 26 (6-6,1-1) arrange gtable[layout]
## 27 27 (6-6,2-2) arrange gtable[layout]
## 28 28 (6-6,3-3) arrange gtable[layout]
## 29 29 (6-6,4-4) arrange gtable[layout]
## 30 30 (6-6,5-5) arrange gtable[layout]
pbar_annots %>%
select(gene_name, gene_desc) %>%
filter(gene_name %in% plasticity.genes) %>%
distinct() %>%
view()
vg.genes <-
pbar_annots %>%
mutate(gene_desc = clean_text(gene_desc)) %>%
filter(str_detect(gene_desc,"venom carboxylesterase")) %>%
pull(gene_name) %>%
unique()
## Removed leading and trailing spaces.
##
## Removed special characters.
##
## Characters changed to lowercase.
vg.plots <- list()
for (i in 1:length(vg.genes)) {
g <- vg.genes[i]
vg.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
pbar_annots %>%
select(gene_name, gene_desc) %>%
filter(gene_name %in% vg.genes) %>%
distinct()
## # A tibble: 2 × 2
## gene_name gene_desc
## <chr> <chr>
## 1 LOC105432365 venom carboxylesterase-6-like
## 2 LOC105429205 venom carboxylesterase-6
vg.plots %>%
multi.plot(cols=2, rows=1)
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
phase.shifted.genes <- c("LOC105430824", # period
"LOC105432451", # clock
"LOC105427360", # atypical PKC
"LOC105433421", # nemo
"LOC105430942", # mAchR - gar2
"LOC105427051", # DopEcR
"LOC105428795", # Dopamine-D2-Receptor
"LOC105428614", # Serotonin 2A Receptor
"LOC105431076", # Octopamine receptor beta 2R
"LOC105423033", # Octopamine receptro beta 1R
"LOC105424438", # Ecdysone receptor like
"LOC105422534" # Ecdysone induced 78C
)
phase.shifted.plots <- list()
for (i in 1:length(phase.shifted.genes)) {
g <- phase.shifted.genes[i]
phase.shifted.plots[[i]] <-
exp.plot(g) %>%
pluck(1) +
theme(title = element_blank(),
axis.title.y = element_blank())
}
phase.shifted.plots %>%
multi.plot(cols=3, rows=4)
## TableGrob (4 x 3) "arrange": 12 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (2-2,1-1) arrange gtable[layout]
## 5 5 (2-2,2-2) arrange gtable[layout]
## 6 6 (2-2,3-3) arrange gtable[layout]
## 7 7 (3-3,1-1) arrange gtable[layout]
## 8 8 (3-3,2-2) arrange gtable[layout]
## 9 9 (3-3,3-3) arrange gtable[layout]
## 10 10 (4-4,1-1) arrange gtable[layout]
## 11 11 (4-4,2-2) arrange gtable[layout]
## 12 12 (4-4,3-3) arrange gtable[layout]
# save(goi.list, file = paste0(path_to_repo,"/results/genes_of_interest/goi_list.RData"))